exp1.data <- read.csv("ProcessedData/Experiment1Data.csv")
#put Block in desired order
exp1.data$Block <- factor(exp1.data$Block,
levels=c("Training",
"Recall 1",
"Interaction Block 1",
"Interaction Block 2",
"Recall 2"))
#Required to avoid overlapping text in some plots
exp1.data$BlockNewlines <- plyr::revalue(exp1.data$Block,
c("Interaction Block 1"="Interaction\nBlock 1",
"Interaction Block 2"="Interaction\nBlock 2"))
#I want Pair as a factor
#Note: pairs 60-102 collected at Edinburgh, 151-190 collected at Warwick
exp1.data$Pair <- as.factor(exp1.data$Pair)
plyr::ddply(exp1.data,~Condition,plyr::summarise,Current_N=length(unique(Pair)))
## Condition Current_N
## 1 66-33 20
## 2 83-17 20
This is very high throughout.
exp1.by.pair.scores <- aggregate(Score~Condition+Block+Pair,data=subset(exp1.data,Block %in% c("Interaction Block 1", "Interaction Block 2")),FUN=sum)
aggregate(Score~Condition,data=exp1.by.pair.scores,FUN=mean)
## Condition Score
## 1 66-33 46.625
## 2 83-17 46.900
aggregate(Score~Block+Condition,data=exp1.by.pair.scores,FUN=mean)
## Block Condition Score
## 1 Interaction Block 1 66-33 46.65
## 2 Interaction Block 2 66-33 46.60
## 3 Interaction Block 1 83-17 46.55
## 4 Interaction Block 2 83-17 47.25
#We only want to analyse singulars
exp1.data.singulars <- subset(exp1.data,Number==1)
exp1.data.by.block <- aggregate(data=exp1.data.singulars,
Marked~Condition + Pair + Block + BlockNewlines + Participant + ParticipantID,
FUN=mean)
exp1.data.by.block <- plyr::rename(exp1.data.by.block,c("Marked"="ProportionMarkedSingulars"))
exp1.data.by.block$Condition <- plyr::revalue(exp1.data.by.block$Condition,
c("66-33"="Condition: 66-33",
"83-17"="Condition: 83-17"))
One graph for all raw data, another with one facet per pair, one with mean and 95% CIs.
#All data
ggplot(data=exp1.data.by.block, aes(x=BlockNewlines, y=ProportionMarkedSingulars, group=ParticipantID, colour=Pair,ymin=0,ymax=1)) +
theme_bw() +
facet_wrap(~Condition, ncol=4) +
geom_line() +
ylab("Proportion Marked Singulars") +
theme(axis.title.x = element_blank()) +
theme(legend.position="none") +
scale_y_continuous(breaks=seq(0,1,1/3),labels=c("0","1/3","2/3","1"))
#For faccetted by-pair plot, need to add a DummyPair column so that the facets work out nicely - this is a little clumsy, but simplest way to do this seems to be to split by condition and convert Pair factor to integers
exp1.data.by.block.6633 <- subset(exp1.data.by.block,Condition=='Condition: 66-33')
exp1.data.by.block.6633$DummyPair <- droplevels(exp1.data.by.block.6633$Pair)
#change levels to integers
levels(exp1.data.by.block.6633$DummyPair) <- 1:length(levels(exp1.data.by.block.6633$DummyPair))
#ditto for other condition
exp1.data.by.block.8317 <- subset(exp1.data.by.block,Condition=='Condition: 83-17')
exp1.data.by.block.8317$DummyPair <- droplevels(exp1.data.by.block.8317$Pair)
#change levels to integers
levels(exp1.data.by.block.8317$DummyPair) <- 1:length(levels(exp1.data.by.block.8317$DummyPair))
#and recombine
exp1.data.by.block.dummyp <- rbind(exp1.data.by.block.6633,exp1.data.by.block.8317)
#By-pair figure
ggplot(data=exp1.data.by.block.dummyp, aes(x=BlockNewlines, y=ProportionMarkedSingulars, group=ParticipantID, colour=Pair,ymin=0,ymax=1)) +
theme_bw() +
facet_grid(DummyPair ~ Condition) +
geom_line() +
ylab("Proportion Marked Singulars") +
theme(legend.position="none") +
scale_y_continuous(breaks=seq(0,1,1/3),labels=c("0","1/3","2/3","1")) +
theme(strip.text.y = element_blank()) +
theme(axis.title.x = element_blank())
#Means and CIs
ggplot(data=exp1.data.by.block, aes(x=Participant, y=ProportionMarkedSingulars, group=Participant, shape=Participant,fill=Participant)) +
theme_bw() +
facet_grid(Condition~Block) +
stat_summary(geom='bar', fun.y='mean', position='dodge',colour='black') +
stat_summary(geom='errorbar',fun.data='mean_cl_boot',fun.ymin="min", fun.ymax="max",colour="black",width=0.1) +
#stat_summary(geom='errorbar',colour="black",width=0.1) +
stat_summary(geom='point', fun.y='mean', position='dodge',colour='black') +
scale_fill_manual(values=c(my.colours[2],my.colours[1],my.colours[3])) +
coord_cartesian(ylim=c(0,1)) +
scale_y_continuous(breaks=seq(0,1,1/3),labels=c("0","1/3","2/3","1")) +
ylab("Proportion Marked Singulars") +
theme(axis.title.x = element_blank()) +
theme(legend.position="none")
systematic.users <- subset(exp1.data.by.block,Block=="Recall 1" & (ProportionMarkedSingulars==1 | ProportionMarkedSingulars==0))
plyr::ddply(systematic.users,~Condition,plyr::summarise,Current_N=length(unique(Pair)))
## Condition Current_N
## 1 Condition: 66-33 11
## 2 Condition: 83-17 13
systematic.users <- subset(exp1.data.by.block,Block=="Interaction Block 2" & (ProportionMarkedSingulars==1 | ProportionMarkedSingulars==0))
plyr::ddply(systematic.users,~Condition,plyr::summarise,Current_N=length(unique(Pair)))
## Condition Current_N
## 1 Condition: 66-33 17
## 2 Condition: 83-17 20
#In order to count convergence on systematic use/non-use, aggregating over participants in each pair
exp1.data.by.block.agg <- aggregate(ProportionMarkedSingulars~Condition+Pair+Block,data=exp1.data.by.block,FUN=mean)
systematic.user.pairs <- subset(exp1.data.by.block.agg,Block=="Interaction Block 2" & ProportionMarkedSingulars==1)
plyr::ddply(subset(systematic.user.pairs),~Condition,plyr::summarise,Current_N=length(unique(Pair)))
## Condition Current_N
## 1 Condition: 66-33 6
## 2 Condition: 83-17 5
systematic.nonuser.pairs <- subset(exp1.data.by.block.agg,Block=="Interaction Block 2" & ProportionMarkedSingulars==0)
plyr::ddply(subset(systematic.nonuser.pairs),~Condition,plyr::summarise,Current_N=length(unique(Pair)))
## Condition Current_N
## 1 Condition: 66-33 10
## 2 Condition: 83-17 10
We are primarily interested in how participants change their behaviour over time - since we can’t directly test interactions given the restrictions imposed by the nature of our data, we therefore measure change by calculating differences in singular marker use between timepoints. This involves calculating block-to-block differences in marker use by participant for contrasts of interest.
#The most convenient way to calculate differences from block to block is to restructure the data to wide format.
#BlockNewlines causes irritation here, so selecting it out
exp1.change.data <- tidyr::spread(data=dplyr::select(exp1.data.by.block,-BlockNewlines),Block,ProportionMarkedSingulars)
exp1.change.data$ChangeTrainingRecall1 <- exp1.change.data$'Recall 1' - exp1.change.data$'Training'
exp1.change.data$ChangeRecall1Interaction2 <- exp1.change.data$'Interaction Block 2'-exp1.change.data$'Recall 1'
exp1.change.data$ChangeRecall1Recall2 <- exp1.change.data$'Recall 2'-exp1.change.data$'Recall 1'
#convert back to long format for plotting
exp1.change.data.for.plotting <- dplyr::select(exp1.change.data,-Training,-'Recall 1',-'Recall 2',-'Interaction Block 1',-'Interaction Block 2')
exp1.change.data.for.plotting <- tidyr::gather(exp1.change.data.for.plotting,
BlockToBlock,Change,ChangeTrainingRecall1,ChangeRecall1Interaction2,ChangeRecall1Recall2)
#Make column names more user-friendly
exp1.change.data.for.plotting$BlockToBlock <- plyr::revalue(exp1.change.data.for.plotting$BlockToBlock,
c("ChangeTrainingRecall1"="Training to Recall 1",
"ChangeRecall1Interaction2"="Recall 1 to Interaction Block 2",
"ChangeRecall1Recall2"="Recall 1 to Recall 2"))
#I often want to plot Both Ps as well as P1 and P2 - to do this, I am duplicating the data and labelling the duplicate as "Both Ps" in the Participant column
exp1.change.data.for.plotting.Pscombined <- exp1.change.data.for.plotting
exp1.change.data.for.plotting.Pscombined$Participant <- "Both Ps"
exp1.change.data.for.plotting.combined <- rbind(exp1.change.data.for.plotting,exp1.change.data.for.plotting.Pscombined)
#Reorder the levels so that the BothPs data is last
exp1.change.data.for.plotting.combined$Participant <- factor(exp1.change.data.for.plotting.combined$Participant,levels=c("P1","P2","Both Ps"))
#I also want to show the effect for the two conditions combined, since there is often no difference between conditions - again, I duplicate the dataframe and label the Condition column for the combined dataframe as "Both Conditions"
exp1.change.data.for.plotting.Cscombined <- exp1.change.data.for.plotting.combined
exp1.change.data.for.plotting.Cscombined$Condition <- "Both Conditions"
exp1.change.data.for.plotting.combined <- rbind(exp1.change.data.for.plotting.combined,exp1.change.data.for.plotting.Cscombined)
#Reorder the levels so that the Both Conditions data is first
exp1.change.data.for.plotting.combined$Condition <- relevel(exp1.change.data.for.plotting.combined$Condition,ref=c("Both Conditions"))
#Reorder the levels ot BlockToBlock so that they reflect experiment order
exp1.change.data.for.plotting.combined$BlockToBlock <- factor(exp1.change.data.for.plotting.combined$BlockToBlock,levels=c("Training to Recall 1","Recall 1 to Interaction Block 2","Recall 1 to Recall 2"))
These change values allow us to test whether P1 and P2 differ in the extent to which they change, and whether the conditions differ in the extent to which participants change. In order to test for interactions (i.e. do P1 and P2 differ to the same extent in both conditions?) we calculate the difference between P1 and P2 within each pair (i.e. we take the P1 change in marker use, calculated above, and subtract the P2 change in marker use). We can then test for an interaction by comparing these difference scores across conditions - if P1 and P2 change to a similar extent in both conditions, then the P1-P2 difference scores will be similar across the two conditions; however, if P1 and P2 change to different extents across conditions, these P1-P2 difference scores will be different.
#The most convenient way to calculate differences is to restructure the data to wide format, organised by pair.
exp1.p1.p2.diffs.data <- tidyr::spread(data=dplyr::select(exp1.change.data.for.plotting,-ParticipantID),Participant,Change)
#now we simply calculate the difference in P1 and P2 change values.
exp1.p1.p2.diffs.data$P1P2Difference <- exp1.p1.p2.diffs.data$P1 - exp1.p1.p2.diffs.data$P2
ggplot(data=subset(exp1.change.data.for.plotting.combined,BlockToBlock=="Training to Recall 1"), aes(x=Participant, y=Change, fill=Participant)) +
theme_bw() +
facet_grid(. ~ Condition) +
geom_boxplot() +
scale_fill_manual(values=c(my.colours[2],my.colours[1],my.colours[3])) +
scale_y_continuous(limits = c(-1,1.1),breaks=seq(-1,1,1/3),labels=c("-1","-2/3","-1/3","0","+1/3","+2/3","+1")) +
ylab("Change") +
theme(axis.title.x = element_blank()) +
theme(legend.position="none") +
ggtitle("Training to Recall 1")
Q1a: do the conditions differ in change from training to recall 1?
ggplot(data=subset(exp1.change.data.for.plotting.combined,BlockToBlock=="Training to Recall 1" & Participant=="Both Ps"), aes(x=Participant, y=Change, fill=Participant)) +
theme_bw() +
facet_grid(. ~ Condition) +
geom_boxplot() +
scale_fill_manual(values=c(my.colours[3])) +
scale_y_continuous(limits = c(-1,1.1),breaks=seq(-1,1,1/3),labels=c("-1","-2/3","-1/3","0","+1/3","+2/3","+1")) +
ylab("Change") +
theme(axis.title.x = element_blank()) +
theme(legend.position="none") +
ggtitle("Training to Recall 1")
wilcox.test(subset(exp1.change.data,Condition=="Condition: 66-33")$ChangeTrainingRecall1,
subset(exp1.change.data,Condition=="Condition: 83-17")$ChangeTrainingRecall1)
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(exp1.change.data, Condition == "Condition: 66-33")$ChangeTrainingRecall1 and subset(exp1.change.data, Condition == "Condition: 83-17")$ChangeTrainingRecall1
## W = 964, p-value = 0.1151
## alternative hypothesis: true location shift is not equal to 0
No significant difference.
Q1b: do the Ps differ?
ggplot(data=subset(exp1.change.data.for.plotting.combined,BlockToBlock=="Training to Recall 1" & Participant!="Both Ps" & Condition=="Both Conditions"), aes(x=Participant, y=Change, fill=Participant)) +
theme_bw() +
facet_grid(. ~ Condition) +
geom_boxplot() +
scale_fill_manual(values=c(my.colours[2],my.colours[1])) +
scale_y_continuous(limits = c(-1,1.1),breaks=seq(-1,1,1/3),labels=c("-1","-2/3","-1/3","0","+1/3","+2/3","+1")) +
ylab("Change") +
theme(axis.title.x = element_blank()) +
theme(legend.position="none") +
ggtitle("Training to Recall 1")
Yes.
wilcox.test(subset(exp1.change.data,Participant=="P1")$ChangeTrainingRecall1,
subset(exp1.change.data,Participant=="P2")$ChangeTrainingRecall1)
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(exp1.change.data, Participant == "P1")$ChangeTrainingRecall1 and subset(exp1.change.data, Participant == "P2")$ChangeTrainingRecall1
## W = 1099.5, p-value = 0.00396
## alternative hypothesis: true location shift is not equal to 0
Q1c: does the difference between Ps differ across conditions?
ggplot(data=subset(exp1.p1.p2.diffs.data,BlockToBlock=="Training to Recall 1"), aes(x=Condition, y=P1P2Difference, fill=Condition)) +
theme_bw() +
geom_boxplot() +
expand_limits(y=1) +
scale_fill_manual(values=c(my.colours[2],my.colours[1],my.colours[3])) +
scale_y_continuous(breaks=seq(-1,1,1/3),labels=c("-1","-2/3","-1/3","0","+1/3","+2/3","+1")) +
ylab("Difference between P1 and P2") +
theme(axis.title.x = element_blank()) +
theme(legend.position="none") +
ggtitle("Training to Recall 1")
wilcox.test(subset(exp1.p1.p2.diffs.data,Condition=="Condition: 66-33" & BlockToBlock=="Training to Recall 1")$P1P2Difference,
subset(exp1.p1.p2.diffs.data,Condition=="Condition: 83-17" & BlockToBlock=="Training to Recall 1")$P1P2Difference)
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(exp1.p1.p2.diffs.data, Condition == "Condition: 66-33" & and subset(exp1.p1.p2.diffs.data, Condition == "Condition: 83-17" & BlockToBlock == "Training to Recall 1")$P1P2Difference and BlockToBlock == "Training to Recall 1")$P1P2Difference
## W = 207, p-value = 0.8604
## alternative hypothesis: true location shift is not equal to 0
No significant difference.
Q2a: Does the change from training differ from 0 (i.e. do participants on average probability match)?
wilcox.test(exp1.change.data$ChangeTrainingRecall1)
##
## Wilcoxon signed rank test with continuity correction
##
## data: exp1.change.data$ChangeTrainingRecall1
## V = 1643, p-value = 0.6113
## alternative hypothesis: true location is not equal to 0
No significant difference, i.e. this looks like probability matching overall.
Q2b: since the effect of P was significant, testing P1 and P2 seperately - there was no difference between conditions and no interaction between condition and P, so we collapse across conditions here.
Some evidence for a significant shift in both (marginal for P1s, clearer in P2s), i.e. some evidence that participants are drawn towards extremes.
wilcox.test(subset(exp1.change.data,Participant=="P1")$ChangeTrainingRecall1)
##
## Wilcoxon signed rank test with continuity correction
##
## data: subset(exp1.change.data, Participant == "P1")$ChangeTrainingRecall1
## V = 530, p-value = 0.05111
## alternative hypothesis: true location is not equal to 0
wilcox.test(subset(exp1.change.data,Participant=="P2")$ChangeTrainingRecall1)
##
## Wilcoxon signed rank test with continuity correction
##
## data: subset(exp1.change.data, Participant == "P2")$ChangeTrainingRecall1
## V = 240.5, p-value = 0.03729
## alternative hypothesis: true location is not equal to 0
ggplot(data=subset(exp1.change.data.for.plotting.combined,BlockToBlock=="Recall 1 to Interaction Block 2"), aes(x=Participant, y=Change, fill=Participant)) +
theme_bw() +
facet_grid(.~Condition) +
geom_boxplot() +
scale_fill_manual(values=c(my.colours[2],my.colours[1],my.colours[3])) +
scale_y_continuous(limits = c(-1,1.1),breaks=seq(-1,1,1/3),labels=c("-1","-2/3","-1/3","0","+1/3","+2/3","+1")) +
ylab("Change") +
theme(axis.title.x = element_blank()) +
theme(legend.position="none") +
ggtitle("Recall 1 To Interaction Block 2")
Q1a: do the conditions differ in change from recall 1 to interaction 2?
ggplot(data=subset(exp1.change.data.for.plotting.combined,BlockToBlock=="Recall 1 to Interaction Block 2" & Participant=="Both Ps"), aes(x=Participant, y=Change, fill=Participant)) +
theme_bw() +
facet_grid(. ~ Condition) +
geom_boxplot() +
scale_fill_manual(values=c(my.colours[3])) +
scale_y_continuous(limits = c(-1,1.1),breaks=seq(-1,1,1/3),labels=c("-1","-2/3","-1/3","0","+1/3","+2/3","+1")) +
ylab("Change") +
theme(axis.title.x = element_blank()) +
theme(legend.position="none") +
ggtitle("Recall 1 to Interaction Block 2")
wilcox.test(subset(exp1.change.data,Condition=="Condition: 66-33")$ChangeRecall1Interaction2,
subset(exp1.change.data,Condition=="Condition: 83-17")$ChangeRecall1Interaction2)
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(exp1.change.data, Condition == "Condition: 66-33")$ChangeRecall1Interaction2 and subset(exp1.change.data, Condition == "Condition: 83-17")$ChangeRecall1Interaction2
## W = 816.5, p-value = 0.8766
## alternative hypothesis: true location shift is not equal to 0
No significant difference.
Q1b: do the Ps differ?
ggplot(data=subset(exp1.change.data.for.plotting.combined,BlockToBlock=="Recall 1 to Interaction Block 2" & Participant!="Both Ps" & Condition=="Both Conditions"), aes(x=Participant, y=Change, fill=Participant)) +
theme_bw() +
facet_grid(. ~ Condition) +
geom_boxplot() +
scale_fill_manual(values=c(my.colours[2],my.colours[1])) +
scale_y_continuous(limits = c(-1,1.1),breaks=seq(-1,1,1/3),labels=c("-1","-2/3","-1/3","0","+1/3","+2/3","+1")) +
ylab("Change") +
theme(axis.title.x = element_blank()) +
theme(legend.position="none") +
ggtitle("Recall 1 to Interaction Block 2")
wilcox.test(subset(exp1.change.data,Participant=="P1")$ChangeRecall1Interaction2,
subset(exp1.change.data,Participant=="P2")$ChangeRecall1Interaction2)
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(exp1.change.data, Participant == "P1")$ChangeRecall1Interaction2 and subset(exp1.change.data, Participant == "P2")$ChangeRecall1Interaction2
## W = 531, p-value = 0.00918
## alternative hypothesis: true location shift is not equal to 0
Yes.
Q1c: does the difference between Ps differ across conditions?
ggplot(data=subset(exp1.p1.p2.diffs.data,BlockToBlock=="Recall 1 to Interaction Block 2"), aes(x=Condition, y=P1P2Difference, fill=Condition)) +
theme_bw() +
geom_boxplot() +
expand_limits(y=1) +
scale_fill_manual(values=c(my.colours[2],my.colours[1],my.colours[3])) +
scale_y_continuous(breaks=seq(-1,1,1/3),labels=c("-1","-2/3","-1/3","0","+1/3","+2/3","+1")) +
ylab("Difference between P1 and P2") +
theme(axis.title.x = element_blank()) +
theme(legend.position="none") +
ggtitle("Recall 1 to Interaction Block 2")
wilcox.test(subset(exp1.p1.p2.diffs.data,Condition=="Condition: 66-33" & BlockToBlock=="Recall 1 to Interaction Block 2")$P1P2Difference,
subset(exp1.p1.p2.diffs.data,Condition=="Condition: 83-17" & BlockToBlock=="Recall 1 to Interaction Block 2")$P1P2Difference)
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(exp1.p1.p2.diffs.data, Condition == "Condition: 66-33" & and subset(exp1.p1.p2.diffs.data, Condition == "Condition: 83-17" & BlockToBlock == "Recall 1 to Interaction Block 2")$P1P2Difference and BlockToBlock == "Recall 1 to Interaction Block 2")$P1P2Difference
## W = 277, p-value = 0.03839
## alternative hypothesis: true location shift is not equal to 0
Yes.
Q2a: Does the overall change from Recall 1 to Interaction 2 differ from 0?
wilcox.test(exp1.change.data$ChangeRecall1Interaction2)
##
## Wilcoxon signed rank test with continuity correction
##
## data: exp1.change.data$ChangeRecall1Interaction2
## V = 548.5, p-value = 0.007033
## alternative hypothesis: true location is not equal to 0
Yes.
Q2b: Since the effect of Participant was significant, test this seperately for P1 and P2.
P1 show an effect.
P2 do not.
This suggests that participant with the higher frequency of marker use in their training (the P1s) change more, and downwards, during interaction.
wilcox.test(subset(exp1.change.data,Participant=="P1")$ChangeRecall1Interaction2)
##
## Wilcoxon signed rank test with continuity correction
##
## data: subset(exp1.change.data, Participant == "P1")$ChangeRecall1Interaction2
## V = 33, p-value = 0.000184
## alternative hypothesis: true location is not equal to 0
wilcox.test(subset(exp1.change.data,Participant=="P2")$ChangeRecall1Interaction2)
##
## Wilcoxon signed rank test with continuity correction
##
## data: subset(exp1.change.data, Participant == "P2")$ChangeRecall1Interaction2
## V = 283, p-value = 0.9715
## alternative hypothesis: true location is not equal to 0
Q3a: Since Q1c suggests an interaction, need to consider the P1-P2 difference in each condition seperately. Firstly, do P1 and P2 differ in each condition considered seperately?
wilcox.test(subset(exp1.change.data,Condition=="Condition: 66-33" & Participant=="P1")$ChangeRecall1Interaction2,
subset(exp1.change.data,Condition=="Condition: 66-33" & Participant=="P2")$ChangeRecall1Interaction2)
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(exp1.change.data, Condition == "Condition: 66-33" & Participant == and subset(exp1.change.data, Condition == "Condition: 66-33" & Participant == "P1")$ChangeRecall1Interaction2 and "P2")$ChangeRecall1Interaction2
## W = 135.5, p-value = 0.08162
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(subset(exp1.change.data,Condition=="Condition: 83-17" & Participant=="P1")$ChangeRecall1Interaction2,
subset(exp1.change.data,Condition=="Condition: 83-17" & Participant=="P2")$ChangeRecall1Interaction2)
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(exp1.change.data, Condition == "Condition: 83-17" & Participant == and subset(exp1.change.data, Condition == "Condition: 83-17" & Participant == "P1")$ChangeRecall1Interaction2 and "P2")$ChangeRecall1Interaction2
## W = 130, p-value = 0.05706
## alternative hypothesis: true location shift is not equal to 0
They differ marginally in each condition considered seperately.
Q3b: And testing P1 and P2 seperately in each condition - do they show an overall reduction in marker use?
wilcox.test(subset(exp1.change.data,Condition=="Condition: 66-33" & Participant=="P1")$ChangeRecall1Interaction2)
##
## Wilcoxon signed rank test with continuity correction
##
## data: subset(exp1.change.data, Condition == "Condition: 66-33" & Participant == "P1")$ChangeRecall1Interaction2
## V = 6, p-value = 0.006403
## alternative hypothesis: true location is not equal to 0
wilcox.test(subset(exp1.change.data,Condition=="Condition: 66-33" & Participant=="P2")$ChangeRecall1Interaction2)
##
## Wilcoxon signed rank test with continuity correction
##
## data: subset(exp1.change.data, Condition == "Condition: 66-33" & Participant == "P2")$ChangeRecall1Interaction2
## V = 85, p-value = 1
## alternative hypothesis: true location is not equal to 0
wilcox.test(subset(exp1.change.data,Condition=="Condition: 83-17" & Participant=="P1")$ChangeRecall1Interaction2)
##
## Wilcoxon signed rank test with continuity correction
##
## data: subset(exp1.change.data, Condition == "Condition: 83-17" & Participant == "P1")$ChangeRecall1Interaction2
## V = 10, p-value = 0.008151
## alternative hypothesis: true location is not equal to 0
wilcox.test(subset(exp1.change.data,Condition=="Condition: 83-17" & Participant=="P2")$ChangeRecall1Interaction2)
##
## Wilcoxon signed rank test with continuity correction
##
## data: subset(exp1.change.data, Condition == "Condition: 83-17" & Participant == "P2")$ChangeRecall1Interaction2
## V = 54, p-value = 0.7537
## alternative hypothesis: true location is not equal to 0
In both conditions, P1s show a reduction in marker use, P2s do not.
Q3c: How about comparing P1 and P2 across conditions - do P1s in both conditions show a similar reduction, do P2s show a similar change?
wilcox.test(subset(exp1.change.data,Condition=="Condition: 66-33" & Participant=="P1")$ChangeRecall1Interaction2,
subset(exp1.change.data,Condition=="Condition: 83-17" & Participant=="P1")$ChangeRecall1Interaction2)
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(exp1.change.data, Condition == "Condition: 66-33" & Participant == and subset(exp1.change.data, Condition == "Condition: 83-17" & Participant == "P1")$ChangeRecall1Interaction2 and "P1")$ChangeRecall1Interaction2
## W = 230.5, p-value = 0.4083
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(subset(exp1.change.data,Condition=="Condition: 66-33" & Participant=="P2")$ChangeRecall1Interaction2,
subset(exp1.change.data,Condition=="Condition: 83-17" & Participant=="P2")$ChangeRecall1Interaction2)
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(exp1.change.data, Condition == "Condition: 66-33" & Participant == and subset(exp1.change.data, Condition == "Condition: 83-17" & Participant == "P2")$ChangeRecall1Interaction2 and "P2")$ChangeRecall1Interaction2
## W = 166.5, p-value = 0.3697
## alternative hypothesis: true location shift is not equal to 0
No significant difference - slightly hard to pin down the cause of the interaction, but presumably it resides in the answer to Q3a, i.e. the difference between P1 and P2 is somewhat clearer in the 83-17 condition than in the 66-33 condition.
ggplot(data=subset(exp1.change.data.for.plotting.combined,BlockToBlock=="Recall 1 to Recall 2"), aes(x=Participant, y=Change, fill=Participant)) +
theme_bw() +
facet_grid(.~Condition) +
geom_boxplot() +
scale_fill_manual(values=c(my.colours[2],my.colours[1],my.colours[3])) +
scale_y_continuous(limits = c(-1,1.1),breaks=seq(-1,1,1/3),labels=c("-1","-2/3","-1/3","0","+1/3","+2/3","+1")) +
ylab("Change") +
theme(axis.title.x = element_blank()) +
theme(legend.position="none") +
ggtitle("Recall 1 To Recall 2")
This shows the lasting effect of interaction.
Q1a: do the conditions differ in change from Recall 1 to Recall 2?
ggplot(data=subset(exp1.change.data.for.plotting.combined,BlockToBlock=="Recall 1 to Recall 2" & Participant=="Both Ps"), aes(x=Participant, y=Change, fill=Participant)) +
theme_bw() +
facet_grid(. ~ Condition) +
geom_boxplot() +
scale_fill_manual(values=c(my.colours[3])) +
scale_y_continuous(limits = c(-1,1.1),breaks=seq(-1,1,1/3),labels=c("-1","-2/3","-1/3","0","+1/3","+2/3","+1")) +
ylab("Change") +
theme(axis.title.x = element_blank()) +
theme(legend.position="none") +
ggtitle("Recall 1 to Recall 2")
wilcox.test(subset(exp1.change.data,Condition=="Condition: 66-33")$ChangeRecall1Recall2,
subset(exp1.change.data,Condition=="Condition: 83-17")$ChangeRecall1Recall2)
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(exp1.change.data, Condition == "Condition: 66-33")$ChangeRecall1Recall2 and subset(exp1.change.data, Condition == "Condition: 83-17")$ChangeRecall1Recall2
## W = 861.5, p-value = 0.5536
## alternative hypothesis: true location shift is not equal to 0
No significant difference.
Q1b: do the Ps differ?
ggplot(data=subset(exp1.change.data.for.plotting.combined,BlockToBlock=="Recall 1 to Recall 2" & Participant!="Both Ps" & Condition=="Both Conditions"), aes(x=Participant, y=Change, fill=Participant)) +
theme_bw() +
facet_grid(. ~ Condition) +
geom_boxplot() +
scale_fill_manual(values=c(my.colours[2],my.colours[1])) +
scale_y_continuous(limits = c(-1,1.1),breaks=seq(-1,1,1/3),labels=c("-1","-2/3","-1/3","0","+1/3","+2/3","+1")) +
ylab("Change") +
theme(axis.title.x = element_blank()) +
theme(legend.position="none") +
ggtitle("Recall 1 to Recall 2")
wilcox.test(subset(exp1.change.data,Participant=="P1")$ChangeRecall1Recall2,
subset(exp1.change.data,Participant=="P2")$ChangeRecall1Recall2)
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(exp1.change.data, Participant == "P1")$ChangeRecall1Recall2 and subset(exp1.change.data, Participant == "P2")$ChangeRecall1Recall2
## W = 609, p-value = 0.06429
## alternative hypothesis: true location shift is not equal to 0
Marginally.
Q1c: does the difference between Ps differ across conditions?
ggplot(data=subset(exp1.p1.p2.diffs.data,BlockToBlock=="Recall 1 to Recall 2"), aes(x=Condition, y=P1P2Difference, fill=Condition)) +
theme_bw() +
geom_boxplot() +
expand_limits(y=1) +
scale_fill_manual(values=c(my.colours[2],my.colours[1],my.colours[3])) +
scale_y_continuous(breaks=seq(-1,1,1/3),labels=c("-1","-2/3","-1/3","0","+1/3","+2/3","+1")) +
ylab("Difference between P1 and P2") +
theme(axis.title.x = element_blank()) +
theme(legend.position="none") +
ggtitle("Recall 1 to Recall 2")
wilcox.test(subset(exp1.p1.p2.diffs.data,Condition=="Condition: 66-33" & BlockToBlock=="Recall 1 to Recall 2")$P1P2Difference,
subset(exp1.p1.p2.diffs.data,Condition=="Condition: 83-17" & BlockToBlock=="Recall 1 to Recall 2")$P1P2Difference)
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(exp1.p1.p2.diffs.data, Condition == "Condition: 66-33" & and subset(exp1.p1.p2.diffs.data, Condition == "Condition: 83-17" & BlockToBlock == "Recall 1 to Recall 2")$P1P2Difference and BlockToBlock == "Recall 1 to Recall 2")$P1P2Difference
## W = 247, p-value = 0.2082
## alternative hypothesis: true location shift is not equal to 0
No significant difference.
Q2a: does the change from Recall 1 to Recall 2 differ from 0?
wilcox.test(exp1.change.data$ChangeRecall1Recall2)
##
## Wilcoxon signed rank test with continuity correction
##
## data: exp1.change.data$ChangeRecall1Recall2
## V = 542, p-value = 0.009711
## alternative hypothesis: true location is not equal to 0
Yes.
Q2b: since the participants differed in amount of change, test them separately - since there was no interaction between condition, we are collapsing across condition.
wilcox.test(subset(exp1.change.data,Participant=='P1')$ChangeRecall1Recall2)
##
## Wilcoxon signed rank test with continuity correction
##
## data: subset(exp1.change.data, Participant == "P1")$ChangeRecall1Recall2
## V = 61.5, p-value = 0.00132
## alternative hypothesis: true location is not equal to 0
wilcox.test(subset(exp1.change.data,Participant=='P2')$ChangeRecall1Recall2)
##
## Wilcoxon signed rank test with continuity correction
##
## data: subset(exp1.change.data, Participant == "P2")$ChangeRecall1Recall2
## V = 229.5, p-value = 0.724
## alternative hypothesis: true location is not equal to 0
P1s show an effect.
P2s do not.
This is consistent with a small downwards adjustment in P1s that outlasts interaction.
ggplot(data=subset(exp1.change.data.for.plotting.combined,BlockToBlock%in% c("Training to Recall 1","Recall 1 to Interaction Block 2","Recall 1 to Recall 2")), aes(x=Participant, y=Change, fill=Participant)) +
theme_bw() +
facet_grid(BlockToBlock~Condition) +
geom_boxplot() +
scale_fill_manual(values=c(my.colours[2],my.colours[1],my.colours[3])) +
scale_y_continuous(limits = c(-1,1.1),breaks=seq(-1,1,1/3),labels=c("-1","-2/3","-1/3","0","+1/3","+2/3","+1")) +
ylab("Change") +
theme(axis.title.x = element_blank()) +
theme(legend.position="none")
We can measure alignment using within-pair difference in marker usage - lower within-pair difference is indicative of higher alignment.
# The easiest way to do this is to build a wide-format table by subsetting based on P then merging
p1 <- dplyr::select(subset(exp1.data.by.block,Participant=="P1"),-Participant,-ParticipantID)
p2 <- dplyr::select(subset(exp1.data.by.block,Participant=="P2"),-Participant,-ParticipantID)
exp1.diffs <- merge(p1,p2,by=c('Condition','Pair','Block','BlockNewlines'))
#calculate absolute diffs
exp1.diffs$Difference <- abs(exp1.diffs$ProportionMarkedSingulars.x-exp1.diffs$ProportionMarkedSingulars.y)
#remove unnecessary columns, reorder Block again
exp1.diffs <- dplyr::select(exp1.diffs,-ProportionMarkedSingulars.x,-ProportionMarkedSingulars.y)
ggplot(data=exp1.diffs, aes(x=BlockNewlines, y=Difference, fill=Condition,ymin=0,ymax=1)) +
theme_bw() +
geom_boxplot() +
facet_grid(Condition~.) +
scale_fill_manual(values=c(my.colours[2],my.colours[1],my.colours[3])) +
scale_y_continuous(breaks=seq(0,1,1/3),labels=c("0","1/3","2/3","1")) +
ylab("Within-Pair Difference") +
theme(axis.title.x = element_blank()) +
theme(legend.position="none")
Also useful to show the change in within-pair differences over time.
#The most convenient way to calculate differences from block to block is to restructure the data to wide format.
exp1.diffs.change.data <- tidyr::spread(data=dplyr::select(exp1.diffs,-BlockNewlines),Block,Difference)
exp1.diffs.change.data$ChangeTrainingRecall1 <- exp1.diffs.change.data$'Recall 1' - exp1.diffs.change.data$'Training'
exp1.diffs.change.data$ChangeRecall1Interaction2 <- exp1.diffs.change.data$'Interaction Block 2'-exp1.diffs.change.data$'Recall 1'
exp1.diffs.change.data$ChangeRecall1Recall2 <- exp1.diffs.change.data$'Recall 2'-exp1.diffs.change.data$'Recall 1'
#convert back to long format for plotting
exp1.diffs.change.data.for.plotting <- dplyr::select(exp1.diffs.change.data,-Training,-'Recall 1',-'Recall 2',-'Interaction Block 1',-'Interaction Block 2')
exp1.diffs.change.data.for.plotting <- tidyr::gather(exp1.diffs.change.data.for.plotting,
BlockToBlock,DifferenceChange,ChangeTrainingRecall1,ChangeRecall1Interaction2,ChangeRecall1Recall2)
#Make column names more user-friendly
exp1.diffs.change.data.for.plotting$BlockToBlock <- plyr::revalue(exp1.diffs.change.data.for.plotting$BlockToBlock,
c("ChangeTrainingRecall1"="Training to Recall 1",
"ChangeRecall1Interaction2"="Recall 1 to Interaction Block 2",
"ChangeRecall1Recall2"="Recall 1 to Recall 2"))
#Now duplicate data and add in "Both Conditions" version, with all the data from both conditions, not seperated by Participant
exp1.diffs.change.data.for.plotting.Cscombined <- exp1.diffs.change.data.for.plotting
exp1.diffs.change.data.for.plotting.Cscombined$Condition <- "Both\nConditions"
exp1.diffs.change.data.for.plotting.combined <- rbind(exp1.diffs.change.data.for.plotting,exp1.diffs.change.data.for.plotting.Cscombined)
#reorder levels
#exp1.diffs.change.data.for.plotting.combined$Condition <- relevel(exp1.diffs.change.data.for.plotting.combined$Condition,ref="Both\nConditions")
ggplot(data=subset(exp1.diffs.change.data.for.plotting.combined,BlockToBlock=="Recall 1 to Interaction Block 2"), aes(x=Condition, y=DifferenceChange, fill=Condition)) +
theme_bw() +
geom_boxplot() +
expand_limits(y=1) +
scale_fill_manual(values=c(my.colours[2],my.colours[1],my.colours[3])) +
scale_y_continuous(breaks=seq(-1,1,1/3),labels=c("-1","-2/3","-1/3","0","+1/3","+2/3","+1")) +
ylab("Change In Within-Pair Difference") +
theme(axis.title.x = element_blank()) +
theme(legend.position="none") +
ggtitle("Recall 1 to Interaction Block 2")
Q1: do the conditions differ in change from recall 1 to interaction 2?
wilcox.test(subset(exp1.diffs.change.data,Condition=="Condition: 66-33")$ChangeRecall1Interaction2,
subset(exp1.diffs.change.data,Condition=="Condition: 83-17")$ChangeRecall1Interaction2)
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(exp1.diffs.change.data, Condition == "Condition: 66-33")$ChangeRecall1Interaction2 and subset(exp1.diffs.change.data, Condition == "Condition: 83-17")$ChangeRecall1Interaction2
## W = 265.5, p-value = 0.07852
## alternative hypothesis: true location shift is not equal to 0
Marginally.
Q2a: does the change from recall 1 to interaction 2 differ from 0?
wilcox.test(exp1.diffs.change.data$ChangeRecall1Interaction2)
##
## Wilcoxon signed rank test with continuity correction
##
## data: exp1.diffs.change.data$ChangeRecall1Interaction2
## V = 26.5, p-value = 4.051e-07
## alternative hypothesis: true location is not equal to 0
Yes.
Q2b: do the same seperately, by condition, since the conditions differ.
wilcox.test(subset(exp1.diffs.change.data,Condition=="Condition: 66-33")$ChangeRecall1Interaction2)
##
## Wilcoxon signed rank test with continuity correction
##
## data: subset(exp1.diffs.change.data, Condition == "Condition: 66-33")$ChangeRecall1Interaction2
## V = 1.5, p-value = 0.0001203
## alternative hypothesis: true location is not equal to 0
wilcox.test(subset(exp1.diffs.change.data,Condition=="Condition: 83-17")$ChangeRecall1Interaction2)
##
## Wilcoxon signed rank test with continuity correction
##
## data: subset(exp1.diffs.change.data, Condition == "Condition: 83-17")$ChangeRecall1Interaction2
## V = 11, p-value = 0.0007674
## alternative hypothesis: true location is not equal to 0
66-33 show the effect.
83-17 show the effect.
ggplot(data=subset(exp1.diffs.change.data.for.plotting.combined,BlockToBlock=="Recall 1 to Recall 2"), aes(x=Condition, y=DifferenceChange, fill=Condition)) +
theme_bw() +
geom_boxplot() +
expand_limits(y=1) +
scale_fill_manual(values=c(my.colours[2],my.colours[1],my.colours[3])) +
scale_y_continuous(breaks=seq(-1,1,1/3),labels=c("-1","-2/3","-1/3","0","+1/3","+2/3","+1")) +
ylab("Change In Within-Pair Difference") +
theme(axis.title.x = element_blank()) +
#theme(strip.text.y = element_blank()) +
theme(legend.position="none",plot.title = element_text(size=12,hjust=0.5)) +
ggtitle("Recall 1 to Recall 2")
This is another way of measuring lasting alignment.
Q1: do the conditions differ in change from recall 1 to recall 2?
wilcox.test(subset(exp1.diffs.change.data,Condition=="Condition: 66-33")$ChangeRecall1Recall2,
subset(exp1.diffs.change.data,Condition=="Condition: 83-17")$ChangeRecall1Recall2)
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(exp1.diffs.change.data, Condition == "Condition: 66-33")$ChangeRecall1Recall2 and subset(exp1.diffs.change.data, Condition == "Condition: 83-17")$ChangeRecall1Recall2
## W = 182.5, p-value = 0.6455
## alternative hypothesis: true location shift is not equal to 0
No significant difference.
Q2: does the change from recall 1 to recall 2 differ from 0?
wilcox.test(exp1.diffs.change.data$ChangeRecall1Recall2)
##
## Wilcoxon signed rank test with continuity correction
##
## data: exp1.diffs.change.data$ChangeRecall1Recall2
## V = 197, p-value = 0.007208
## alternative hypothesis: true location is not equal to 0
Yes.
Q3: As per reviewer request (not to be included in the paper, since the conditions don’t differ and differences in significance are not in themselves interpretable in the absence of a significant difference), check this per condition. Effect is significant for 66-33, n.s. for 83-17.
wilcox.test(subset(exp1.diffs.change.data,Condition=="Condition: 66-33")$ChangeRecall1Recall2)
##
## Wilcoxon signed rank test with continuity correction
##
## data: subset(exp1.diffs.change.data, Condition == "Condition: 66-33")$ChangeRecall1Recall2
## V = 43.5, p-value = 0.02266
## alternative hypothesis: true location is not equal to 0
wilcox.test(subset(exp1.diffs.change.data,Condition=="Condition: 83-17")$ChangeRecall1Recall2)
##
## Wilcoxon signed rank test with continuity correction
##
## data: subset(exp1.diffs.change.data, Condition == "Condition: 83-17")$ChangeRecall1Recall2
## V = 58, p-value = 0.1418
## alternative hypothesis: true location is not equal to 0
The most consistent way to show the change in within-pair difference from recall 1 to interaction 2 and recall 1 to recall 2 is a combined plot, using facet labels to identify the block.
#For this plot I need to use the shortened condition names to avoid problems with font size or the x-axis.
exp1.diffs.change.data.for.plotting.combined$ShortCondition <- plyr::revalue(exp1.diffs.change.data.for.plotting.combined$Condition,
c("Condition: 66-33"="66-33",
"Condition: 83-17"="83-17"))
ggplot(data=subset(exp1.diffs.change.data.for.plotting.combined,BlockToBlock %in% c("Recall 1 to Interaction Block 2","Recall 1 to Recall 2")), aes(x=ShortCondition, y=DifferenceChange, fill=ShortCondition)) +
theme_bw() +
facet_grid(.~BlockToBlock) +
geom_boxplot() +
expand_limits(y=1) +
scale_fill_manual(values=c(my.colours[2],my.colours[1],my.colours[3])) +
scale_y_continuous(breaks=seq(-1,1,1/3),labels=c("-1","-2/3","-1/3","0","+1/3","+2/3","+1")) +
ylab("Change In Within-Pair Difference") +
theme(axis.title.x = element_blank()) +
#theme(strip.text.y = element_blank()) +
theme(legend.position="none",plot.title = element_text(size=12,hjust=0.5))
One reviewer wanted to know if the reductions in within-pair difference we see here reflect alignment, or just the fact that everyone heads for zero marking and aligns coincidentally. We can test this in our data by seeing whether the observed changes in within-pair difference are greater than those we obtain if we randomly re-paired participants and re-calculated within-pair difference for these pseudo-pairs.
To do this we will calculate the mean within-pair difference at Interaction 2 for the veridical pairs, then calculate 1000 pseudo-pairings and evaluate how often these exhibit equal or lower mean within-pair difference. The random pseudo-pairings have to reflect the conditions and P1-P2 distinctions so as to avoid e.g. pairing a 66% participant with an 83% participant.
veridical.mean.wpd <- mean(subset(exp1.diffs,Block=="Interaction Block 2")$Difference)
#make a copy of the data to work with - we only need interaction block 2 data
exp1.data.by.block.pseudo <- dplyr::select(subset(exp1.data.by.block,Block=="Interaction Block 2"),-Block,-BlockNewlines)
#helper function to re-assign participants to pseudo-pairs - slightly laoborious
#in order to avoid mixing across conditions etc
reassign.pairs <- function(in.data) {
#subset data by block and participant
p1.6633 <- dplyr::select(subset(in.data,Participant=="P1" & Condition=="Condition: 66-33"),-Participant,-ParticipantID)
p2.6633 <- dplyr::select(subset(in.data,Participant=="P2" & Condition=="Condition: 66-33"),-Participant,-ParticipantID)
p1.8317 <- dplyr::select(subset(in.data,Participant=="P1" & Condition=="Condition: 83-17"),-Participant,-ParticipantID)
p2.8317 <- dplyr::select(subset(in.data,Participant=="P2" & Condition=="Condition: 83-17"),-Participant,-ParticipantID)
#randomise pair assignments
p1.6633$Pair <- sample(p1.6633$Pair)
p2.6633$Pair <- sample(p2.6633$Pair)
p1.8317$Pair <- sample(p1.8317$Pair)
p2.8317$Pair <- sample(p2.8317$Pair)
diffs.pseudo.6633 <- merge(p1.6633,p2.6633,by=c('Condition','Pair'))
diffs.pseudo.8317 <- merge(p1.8317,p2.8317,by=c('Condition','Pair'))
diffs.pseudo <- rbind(diffs.pseudo.6633,diffs.pseudo.8317)
#calculate absolute diffs
diffs.pseudo$Difference <-
abs(diffs.pseudo$ProportionMarkedSingulars.x-diffs.pseudo$ProportionMarkedSingulars.y)
#remove unnecessary columns, reorder Block again
diffs.pseudo <- dplyr::select(diffs.pseudo,-ProportionMarkedSingulars.x,-ProportionMarkedSingulars.y)
#calculate and return mean
mean(diffs.pseudo$Difference)
}
sampled.diffs <- replicate(1000,reassign.pairs(exp1.data.by.block.pseudo))
#report the veridfical within-pair difference
veridical.mean.wpd
## [1] 0.1121212
#report the mean of the sampled diffs
mean(sampled.diffs)
## [1] 0.4572348
#this is the number of cases where we see the same or smaller within-pair difference
sum(sampled.diffs <= veridical.mean.wpd)
## [1] 0
#this is the proportion of cases where we see the same or smaller within-pair difference
sum(sampled.diffs <= veridical.mean.wpd)/length(sampled.diffs)
## [1] 0